import csv
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import xlsxwriter
import scipy as sp
from scipy import signal
from scipy.signal import chirp, find_peaks, peak_widths
from scipy import ndimage, misc
from scipy.cluster.hierarchy import dendrogram, linkage
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.colors import LightSource
import plotly
from matplotlib import cm
from mpl_toolkits import mplot3d
import seaborn as sns
from sklearn.cluster import KMeans
import plotly.plotly as py
import plotly.graph_objs as go
from sklearn.cluster import AgglomerativeClustering
import scipy.cluster.hierarchy as shc
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_samples, silhouette_score
from __future__ import division, print_function
import skfuzzy as fuzz
from matplotlib.backends.backend_pdf import PdfPages
from sklearn.mixture import GaussianMixture as GMM
reader2 = csv.reader(open('data_sample4008.csv', "r"), delimiter = ",")
houses = list(reader2)
df_houses = pd.DataFrame(houses)
Houses_array = {} for i in range(1,5): house = extract_house2(i) if len(house) == 365: selected_house = Multiple_data_format(house) Houses_array[i] = selected_house
Houses_array = []
for i in range(1,200):
house = extract_house2(i)
if len(house) == 365:
Houses_array.append(i)
def extract_house2(num):
house = []
for i in range(len(houses)):
if houses[i][0] == str(num):
each = houses[i]
house.append(each)
return house
def Multiple_data_format(array): #text file into dataframe
df = pd.DataFrame(array)
df.pop(1)
matrix = df.astype(float)
matrix = matrix.transpose()
return matrix
def data_format(array): #text file into dataframe
df = pd.DataFrame(array)
matrix = df.iloc[:,2:]
matrix = matrix.astype(float)
matrix = matrix.transpose()
return matrix
def show_pos (array):
array_new = []
for i in array:
if i < 0:
i = 0
array_new.append(i)
else:
array_new.append(i)
return array_new
def reshaping(df):
df2 = df.reset_index(drop=True)
df2 = df2.transpose()
df2_array = df2.values
shape = len(df2_array)*48
reshaped_usage = df2_array.reshape(shape)
return reshaped_usage
def data_format(array): #text file into dataframe
df = pd.DataFrame(array)
matrix = df.iloc[:,2:]
matrix = matrix.astype(float)
matrix = matrix.transpose()
return matrix
def show_pos (array):
array_new = []
for i in array:
if i < 0:
i = 0
array_new.append(i)
else:
array_new.append(i)
return array_new
def reshaping(df):
df2 = df.reset_index(drop=True)
df2 = df2.transpose()
df2_array = df2.values
shape = len(df2_array)*48
reshaped_usage = df2_array.reshape(shape)
return reshaped_usage
def extract_house2(num):
house = []
for i in range(len(houses)):
if houses[i][0] == str(num):
each = houses[i]
house.append(each)
return house
def SLmin(array, w):
min_array = []
ex = (w-1)//2
ex = [0]*ex
array = ex + array + ex
flag = w
for i in range(len(array)):
sample = array[i:flag]
min_ = -min(sample)
min_array.append(float(min_))
flag = flag + 1
if flag > len(array):
break
return min_array
def LAmin(min_array, w):
min_array2 = []
ex = (w-1)//2
ex = [0]*ex
min_array = ex + min_array + ex
flag = w
for i in range(len(min_array)):
sample = min_array[i:flag]
min_ = -min(sample)
min_array2.append(float(min_))
flag = flag + 1
if flag > len(min_array):
break
return min_array2
def spike_detection(X,w):
for i in range(0,w,2):
i = i+3
min_array = SLmin(X,i)
if i == 3:
LAmin3 = LAmin(min_array, i)
elif i == 5:
LAmin5 = LAmin(min_array, i)
elif i == 7:
LAmin7 = LAmin(min_array, i)
elif i == 9:
LAmin9 = LAmin(min_array, i)
elif i == 11:
LAmin11 = LAmin(min_array, i)
elif i == 13:
LAmin13 = LAmin(min_array, i)
elif i == 15:
LAmin15 = LAmin(min_array, i)
elif i == 17:
LAmin17 = LAmin(min_array, i)
return LAmin3,LAmin5,LAmin7,LAmin9,LAmin11,LAmin13,LAmin15,LAmin17
def comp_R (pre,cur): # Compute R
R_ = []
for i in range(len(pre)):
R = pre[i]-cur[i]
R_.append(R)
return R_
def spike_s (X,LAmin):
spike_s_ = []
for i in range(len(X)):
spike = X[i]-LAmin[i]
spike_s_.append(float(round(spike,3)))
return spike_s_
def Height(R,w): # Height
W = (w+1)//2
np_R = np.array(R)
peaks, _ = find_peaks(np_R, distance=W)
s = [0]*len(R)
replace = np_R[peaks]
indx = peaks
for (indx, replace) in zip(indx, replace):
s[indx] = replace
return s
def filter_ (data):
for n, i in enumerate(data):
if i < 0.1:
data[n] = 0
return data
#### Time of day
time_of_day_ = []
for i in range(1,49):
time_of_day_.append(i%49)
time_of_day = time_of_day_*365
### Take i > 0.1
def non_zero(df):
df = df[df['Height'] != 0]
return df
def colormap(matrix):
plt.pcolor(matrix)
plt.gca().invert_yaxis()
plt.show()
plt.close()
def colormap_house(num):
array = extract_house2(num)
df = data_format(array)
map_ = colormap(df)
return map_
def filt_heat_map(array):
a = len(array)//48
shaped_apr = np.reshape(array,(a,48))
df_sp = pd.DataFrame(shaped_apr)
df2 = df_sp.transpose()
return df2
### Hierarchical Clustering (dendrogram)
def hierar_den_single(df):
z = linkage(df,'sinle')
plt.figure(figsize=(50,7))
dendrogram(z,
leaf_rotation=90.,
leaf_font_size=8
)
plt.show() # pos_3
def hierar_den2(df):
z = linkage(df,'ward')
plt.figure(figsize=(50,7))
dendrogram(z,
leaf_rotation=90.,
leaf_font_size=8
)
plt.show() # pos_3
def scatt_hiera(df,n):
cluster = AgglomerativeClustering(n_clusters=n, affinity='euclidean', linkage='ward')
cluster.fit_predict(df)
clmns = ['Height', 'time_of_day','resolution']
labels = cluster.labels_
#Glue back to originaal data
df['clusters'] = labels
#Add the column into our list
clmns.extend(['clusters'])
#print (df[clmns].groupby(['clusters']).mean())
plt.figure(figsize=(30,20))
sns.lmplot('time_of_day', 'Height',
data=df,
fit_reg=False,
hue="clusters",
scatter_kws={"marker": "D",
"s": 10})
plt.title('time_of_day vs Height')
plt.xlabel('time_of_day')
plt.ylabel('Height')
plt.show()
plt.close()
def print_filt_heat_map(array):
shaped_apr = np.reshape(array,(365,48))
df_sp = pd.DataFrame(shaped_apr)
df2 = df_sp.transpose()
colormap_ = colormap(df2)
return colormap_
def range_heat (df,n):
a = df.loc[df['clusters'] == n]
time_day = a['time_of_day']
rows = []
for i in time_day:
r = round(i*(48/2))
rows.append(r)
return rows
def find_time(df,n,heat_df):
q,w,rows = range_heat(df,n)
rows_no = np.unique(rows)
col = []
i, j = np.where(heat_df.values > 0.00001)
for i in heat_df.columns[j]:
col.append(i)
col_no = np.unique(col)
col_no.sort()
#print('days:', col_no,'time',rows_no)
return col_no,rows_no
# df_frame for heatmap
df_test = pd.DataFrame(index=range(48),columns=range(365))
df_test = df_test.fillna(0) # Resize spike df for heatmap
def filter_height(array,f):
for n, i in enumerate(array):
if i < f:
array[n] = 0
return array
def Generate_clustering_features(shaped_array_26,n):
shaped_array = shaped_array_26
LAmin3,LAmin5,LAmin7,LAmin9,LAmin11,LAmin13,LAmin15,LAmin17 = spike_detection(shaped_array_26,15) # w_17
R3, R5, R7, R9 = comp_R(shaped_array,LAmin3),comp_R(LAmin3,LAmin5),comp_R(LAmin5,LAmin7),comp_R(LAmin7,LAmin9)
R11, R13, R15, R17 = comp_R(LAmin9,LAmin11),comp_R(LAmin11,LAmin13),comp_R(LAmin13,LAmin15),comp_R(LAmin15,LAmin17)
pos3, pos5,pos7,pos9 = Height(R3,3), Height(R5,5), Height(R7,7), Height(R9,9)
pos11, pos13, pos15,pos17 = Height(R11,11), Height(R13,13), Height(R15,15), Height(R17,17)
fh_3, fh_5,fh_7,fh_9 = filter_(pos3), filter_(pos5), filter_(pos7), filter_(pos9)
fh_11, fh_13, fh_15,fh_17 = filter_(pos11), filter_(pos13), filter_(pos15), filter_(pos17)
c_feature3,NM_cluster_features3 = Nm_cluster_feature(shaped_array,3,fh_3,n)
c_feature5,NM_cluster_features5 = Nm_cluster_feature(shaped_array,5,fh_5,n)
c_feature7,NM_cluster_features7 = Nm_cluster_feature(shaped_array,7,fh_7,n)
c_feature9,NM_cluster_features9 = Nm_cluster_feature(shaped_array,9,fh_9,n)
c_feature11,NM_cluster_features11 = Nm_cluster_feature(shaped_array,11,fh_11,n)
c_feature13,NM_cluster_features13 = Nm_cluster_feature(shaped_array,13,fh_13,n)
c_feature15,NM_cluster_features15 = Nm_cluster_feature(shaped_array,15,fh_15,n)
c_feature17,NM_cluster_features17 = Nm_cluster_feature(shaped_array,17,fh_17,n)
frames = [c_feature3, c_feature5,c_feature7,c_feature9,\
c_feature11,c_feature13,c_feature15,c_feature17]
Nm_df_= pd.concat(frames)
NM_clusterings_w3_h = NM_cluster_features3['Height'].to_numpy()
heat_df_3 = filt_heat_map(NM_clusterings_w3_h)
NM_clusterings_w5_h = NM_cluster_features5['Height'].to_numpy()
heat_df_5 = filt_heat_map(NM_clusterings_w5_h)
NM_clusterings_w7_h = NM_cluster_features7['Height'].to_numpy()
heat_df_7 = filt_heat_map(NM_clusterings_w7_h)
NM_clusterings_w9_h = NM_cluster_features9['Height'].to_numpy()
heat_df_9 = filt_heat_map(NM_clusterings_w9_h)
NM_clusterings_w11_h = NM_cluster_features11['Height'].to_numpy()
heat_df_11 = filt_heat_map(NM_clusterings_w11_h)
NM_clusterings_w13_h = NM_cluster_features13['Height'].to_numpy()
heat_df_13 = filt_heat_map(NM_clusterings_w13_h)
NM_clusterings_w15_h = NM_cluster_features15['Height'].to_numpy()
heat_df_15 = filt_heat_map(NM_clusterings_w15_h)
NM_clusterings_w17_h = NM_cluster_features17['Height'].to_numpy()
heat_df_17 = filt_heat_map(NM_clusterings_w17_h)
sum_spike = heat_df_3 + heat_df_5 +heat_df_7+heat_df_9 + \
heat_df_11+heat_df_13+heat_df_15+heat_df_17
return Nm_df_,sum_spike
def Nm_cluster_feature(shaped_array,w,X,n):
time_of_day_ = []
for i in range(1,49):
time_of_day_.append(i%49)
time_of_day = time_of_day_*365
resolution = []
for_mean = []
for i in range(len(shaped_array)):
if X[i] > 0:
for_mean.append(X[i])
resolution.append(w)
NM_height = []
NM_resolution = []
NM_time_of_day = []
for i in range(len(shaped_array)):
if len(for_mean) == 0:
NM_height.append(X[i])
NM_time_of_day.append(time_of_day[i]/(48/2))
NM_resolution.append(np.sqrt(resolution[i]/(17/2)))
else:
mean = np.mean(for_mean)
NM_height.append(X[i]/mean)
NM_time_of_day.append(time_of_day[i]/(48/2))
NM_resolution.append(np.sqrt(resolution[i]/(17/2)))
NM_height = filter_height(NM_height,n)
NM_cluster_features = pd.DataFrame({"Height" : NM_height,
"time_of_day" : NM_time_of_day,
"resolution" : NM_resolution})
NM_clusterings_w3 = non_zero(NM_cluster_features)
return NM_clusterings_w3,NM_cluster_features
np.sqrt(3/(17/2))
house_array = extract_house2(60)
df = data_format(house_array)
df.fillna(0)
december = df.iloc[:,334:]
Jan_Fe = df.iloc[:,:59]
Summer_df = pd.concat([december, Jan_Fe], axis=1, sort=False)
Autumn_df = df.iloc[:,59:151]
Winter_df = df.iloc[:,151:243]
Spring_df = df.iloc[:,243:334]
Summer_shaped_array = reshaping(Summer_df)
summer_pos = show_pos(Summer_shaped_array)
summer_spike_df,summer_sum_spike = Generate_clustering_features(summer_pos,1)
Autumn_shaped_array = reshaping(Autumn_df)
Autumn_pos = show_pos(Autumn_shaped_array)
Autumn_spike_df,sum_spike = Generate_clustering_features(Autumn_pos,1)
Winter_shaped_array = reshaping(Winter_df)
Winter_pos = show_pos(Winter_shaped_array)
Winter_spike_df,sum_spike = Generate_clustering_features(Winter_pos,1)
Spring_shaped_array = reshaping(Spring_df)
Spring_pos = show_pos(Spring_shaped_array)
Spring_spike_df,sum_spike = Generate_clustering_features(Spring_pos,1)
centroid_summer = GMM_3d(summer_spike_df,4)
def generate_heatmap(DB_df,sum_spike,c):
sum_spike.index = np.arange(1, len(sum_spike)+1)
for n in range(c):
test_df = df_test.copy()
rows = range_heat(DB_df,n) # n = cluster number
clus_no = DB_df.loc[DB_df['clusters'] == n]
clus_no['time'] = rows
times_day = []
k = 0
for i in clus_no.time:
j = clus_no.Height.iloc[k]
time = (sum_spike.loc[i] == j).idxmax()
times_day.append(time)
k = k+1
clus_no['day'] = times_day
for i in range(len(times_day)): #index, column
indx = clus_no.time.iloc[i]
col = clus_no.day.iloc[i]
height = clus_no.Height.iloc[i]
test_df.loc[indx,col] = height
print('cluster',n)
colormap(test_df)
def seasonal_spike_time_height(house_num,n):
print("HOUSE NO: ", house_num)
house_array = extract_house2(house_num)
df = data_format(house_array)
df.fillna(0)
december = df.iloc[:,334:]
Jan_Fe = df.iloc[:,:59]
Summer_df = pd.concat([december, Jan_Fe], axis=1, sort=False)
Autumn_df = df.iloc[:,59:151]
Winter_df = df.iloc[:,151:243]
Spring_df = df.iloc[:,243:334]
Summer_shaped_array = reshaping(Summer_df)
summer_pos = show_pos(Summer_shaped_array)
summer_spike_df,sum_spike = Generate_clustering_features(summer_pos,1)
Autumn_shaped_array = reshaping(Autumn_df)
Autumn_pos = show_pos(Autumn_shaped_array)
Autumn_spike_df,sum_spike = Generate_clustering_features(Autumn_pos,1)
Winter_shaped_array = reshaping(Winter_df)
Winter_pos = show_pos(Winter_shaped_array)
Winter_spike_df,sum_spike = Generate_clustering_features(Winter_pos,1)
Spring_shaped_array = reshaping(Spring_df)
Spring_pos = show_pos(Spring_shaped_array)
Spring_spike_df,sum_spike = Generate_clustering_features(Spring_pos,1)
fig, axes = plt.subplots(1,4,squeeze=False,figsize = (35,6))
seasons2 = [summer_spike_df,Autumn_spike_df,Winter_spike_df,Spring_spike_df]
colors = ['b', 'orange', 'g', 'r', 'c', 'm', 'y', 'k', 'Brown', 'ForestGreen']
ncenters = n
centroid = []
for df, ax in zip(seasons2, axes.ravel()):
alldata = np.vstack((df['Height'], df['time_of_day'],df['resolution']))
cntr, u, u0, d, jm, p, fpc = fuzz.cluster.cmeans(
alldata, ncenters, 3, error=0.005, maxiter=1000, init=None)
fpcs = []
fpcs.append(fpc)
for pt in cntr:
ax.plot(pt[1], pt[0], 'rs')
centroid.append(cntr)
# Plot assigned clusters, for each data point in training set
cluster_membership = np.argmax(u, axis=0)
for j in range(ncenters):
ax.plot(df['time_of_day'][cluster_membership == j],
df['Height'][cluster_membership == j],
'.',
color=colors[j])
ax.set_title('Time vs Height, Centers = {0}; FPC = {1:.2f}'.format(ncenters, fpc))
ax.axis('on')
ax.set_ylim([0,6])
ax.grid()
ax.set_xlabel('time_of_day')
ax.set_ylabel('Height')
# fig.tight_layout()
# svg_name = "cluster"+house_num
# fig.savefig(svg_name,'.png')
return centroid
def seasonal_spike_height_duration(house_num,n):
print("HOUSE NO: ", house_num)
house_array = extract_house2(house_num)
df = data_format(house_array)
december = df.iloc[:,334:]
Jan_Fe = df.iloc[:,:59]
Summer_df = pd.concat([december, Jan_Fe], axis=1, sort=False)
Autumn_df = df.iloc[:,59:151]
Winter_df = df.iloc[:,151:243]
Spring_df = df.iloc[:,243:334]
Summer_shaped_array = reshaping(Summer_df)
summer_pos = show_pos(Summer_shaped_array)
summer_spike_df,sum_spike = Generate_clustering_features(summer_pos,1)
Autumn_shaped_array = reshaping(Autumn_df)
Autumn_pos = show_pos(Autumn_shaped_array)
Autumn_spike_df,sum_spike = Generate_clustering_features(Autumn_pos,1)
Winter_shaped_array = reshaping(Winter_df)
Winter_pos = show_pos(Winter_shaped_array)
Winter_spike_df,sum_spike = Generate_clustering_features(Winter_pos,1)
Spring_shaped_array = reshaping(Spring_df)
Spring_pos = show_pos(Spring_shaped_array)
Spring_spike_df,sum_spike = Generate_clustering_features(Spring_pos,1)
fig, axes = plt.subplots(1,4,squeeze=False,figsize = (35,6))
seasons2 = [summer_spike_df,Autumn_spike_df,Winter_spike_df,Spring_spike_df]
#seasons_name = ["Summer","Autumn","Winter","Spring"]
colors = ['b', 'orange', 'g', 'r', 'c', 'm', 'y', 'k', 'Brown', 'ForestGreen']
ncenters = n
centroid = []
for df, ax in zip(seasons2, axes.ravel()):
alldata = np.vstack((df['Height'], df['time_of_day'],df['resolution']))
cntr, u, u0, d, jm, p, fpc = fuzz.cluster.cmeans(
alldata, ncenters, 3, error=0.005, maxiter=1000, init=None)
fpcs = []
fpcs.append(fpc)
for pt in cntr:
ax.plot(pt[2], pt[0], 'rs')
centroid.append(cntr)
# Plot assigned clusters, for each data point in training set
cluster_membership = np.argmax(u, axis=0)
#print("HOUSE NO: ", house_num)
#ax = Axes3D(fig)
for j in range(ncenters):
ax.plot(df['resolution'][cluster_membership == j],
df['Height'][cluster_membership == j],
#df['resolution'][cluster_membership == j],
#rincipalDf['principal component 2'][cluster_membership == j],
'.',
color=colors[j])
ax.set_title('Duration vs Height, Centers = {0}; FPC = {1:.2f}'.format(ncenters, fpc))
ax.axis('on')
ax.set_ylim([0,6.5])
ax.set_xlim([0,2])
ax.grid()
ax.set_xlabel('resolution')
ax.set_ylabel('Height')
# fig.tight_layout()
# svg_name = "cluster"+house_num
# fig.savefig(svg_name,'.png')
return centroid
def seasonal_spike_time_duration(house_num,n):
print("HOUSE NO: ", house_num)
house_array = extract_house2(house_num)
df = data_format(house_array)
december = df.iloc[:,334:]
Jan_Fe = df.iloc[:,:59]
Summer_df = pd.concat([december, Jan_Fe], axis=1, sort=False)
Autumn_df = df.iloc[:,59:151]
Winter_df = df.iloc[:,151:243]
Spring_df = df.iloc[:,243:334]
Summer_shaped_array = reshaping(Summer_df)
summer_pos = show_pos(Summer_shaped_array)
summer_spike_df,sum_spike = Generate_clustering_features(summer_pos,1)
Autumn_shaped_array = reshaping(Autumn_df)
Autumn_pos = show_pos(Autumn_shaped_array)
Autumn_spike_df,sum_spike = Generate_clustering_features(Autumn_pos,1)
Winter_shaped_array = reshaping(Winter_df)
Winter_pos = show_pos(Winter_shaped_array)
Winter_spike_df,sum_spike = Generate_clustering_features(Winter_pos,1)
Spring_shaped_array = reshaping(Spring_df)
Spring_pos = show_pos(Spring_shaped_array)
Spring_spike_df,sum_spike = Generate_clustering_features(Spring_pos,1)
fig, axes = plt.subplots(1,4,squeeze=False,figsize = (35,6))
seasons2 = [summer_spike_df,Autumn_spike_df,Winter_spike_df,Spring_spike_df]
#seasons_name = ["Summer","Autumn","Winter","Spring"]
colors = ['b', 'orange', 'g', 'r', 'c', 'm', 'y', 'k', 'Brown', 'ForestGreen']
ncenters = n
centroid = []
for df, ax in zip(seasons2, axes.ravel()):
alldata = np.vstack((df['Height'], df['time_of_day'],df['resolution']))
cntr, u, u0, d, jm, p, fpc = fuzz.cluster.cmeans(
alldata, ncenters, 3, error=0.005, maxiter=1000, init=None)
fpcs = []
fpcs.append(fpc)
for pt in cntr:
ax.plot(pt[1], pt[2], 'rs')
centroid.append(cntr)
# Plot assigned clusters, for each data point in training set
cluster_membership = np.argmax(u, axis=0)
#print("HOUSE NO: ", house_num)
#ax = Axes3D(fig)
for j in range(ncenters):
ax.plot(df['time_of_day'][cluster_membership == j],
df['resolution'][cluster_membership == j],
#df['resolution'][cluster_membership == j],
#rincipalDf['principal component 2'][cluster_membership == j],
'.',
color=colors[j])
ax.set_title('Time VS Duration, Centers = {0}; FPC = {1:.2f}'.format(ncenters, fpc))
ax.axis('on')
ax.set_ylim([0,6.5])
ax.set_xlim([0,2])
ax.grid()
ax.set_xlabel('time_of_day')
ax.set_ylabel('resolution')
# fig.tight_layout()
# svg_name = "cluster"+house_num
# fig.savefig(svg_name,'.png')
return centroid
def seasonal_spike_3d(house_num,n):
print("HOUSE NO: ", house_num)
house_array = extract_house2(house_num)
df = data_format(house_array)
december = df.iloc[:,334:]
Jan_Fe = df.iloc[:,:59]
Summer_df = pd.concat([december, Jan_Fe], axis=1, sort=False)
Autumn_df = df.iloc[:,59:151]
Winter_df = df.iloc[:,151:243]
Spring_df = df.iloc[:,243:334]
Summer_shaped_array = reshaping(Summer_df)
summer_pos = show_pos(Summer_shaped_array)
summer_spike_df,sum_spike = Generate_clustering_features(summer_pos,1)
Autumn_shaped_array = reshaping(Autumn_df)
Autumn_pos = show_pos(Autumn_shaped_array)
Autumn_spike_df,sum_spike = Generate_clustering_features(Autumn_pos,1)
Winter_shaped_array = reshaping(Winter_df)
Winter_pos = show_pos(Winter_shaped_array)
Winter_spike_df,sum_spike = Generate_clustering_features(Winter_pos,1)
Spring_shaped_array = reshaping(Spring_df)
Spring_pos = show_pos(Spring_shaped_array)
Spring_spike_df,sum_spike = Generate_clustering_features(Spring_pos,1)
seasons = [summer_spike_df,Autumn_spike_df,Winter_spike_df,Spring_spike_df]
print("Summer")
scat_plot_3d(summer_spike_df,n)
print("Autumn")
scat_plot_3d(Autumn_spike_df,n)
print("Winter")
scat_plot_3d(Winter_spike_df,n)
print("Spring")
scat_plot_3d(Spring_spike_df,n)
def scat_plot_3d(df,ncenters):
fig =plt.figure(figsize = (20,6))
ax = fig.add_subplot(111, projection = '3d')
colors = ['b', 'orange', 'g', 'r', 'c', 'm', 'y', 'k', 'Brown', 'ForestGreen']
alldata = np.vstack((df['time_of_day'], df['Height'],df['resolution']))
cntr, u, u0, d, jm, p, fpc = fuzz.cluster.cmeans(
alldata, ncenters, 3, error=0.005, maxiter=1000, init=None)
fpcs = []
fpcs.append(fpc)
for pt in cntr:
ax.scatter(pt[0], pt[2],pt[1],color='r')
cluster_membership = np.argmax(u, axis=0)
for j in range(ncenters):
ax.scatter(df['time_of_day'][cluster_membership == j],
df['resolution'][cluster_membership == j],
df['Height'][cluster_membership == j],
'.',
color=colors[j], edgecolor='k',marker='o', s=20, lw=0)
ax.set_title('Centers = {0}; FPC = {1:.2f}'.format(ncenters, fpc))
#fuzzy partition coefficient
ax.set_xlabel('time_of_day')
ax.set_ylabel('resolution')
ax.set_zlabel('Height')
ax.set_ylim([0,2])
ax.axis('on')
def scatter_2d (houses_array,n):
centroid = []
for i in houses_array:
c = seasonal_spike_time_height(i,n)
seasonal_spike_height_duration(i,n)
seasonal_spike_time_duration(i,n)
centroid.append(c)
centroid = centroid_df(centroid)
return centroid
def centroid_df(centroid):
centroid = np.around(centroid, 3)
new_cen = []
for i in centroid:
for j in range(3):
new_cen.append(i[j])
new_cen = np.array(new_cen)
A = new_cen
df = pd.DataFrame([list(l) for l in A]).stack().apply(pd.Series).reset_index(1, drop=True)
df.columns = ['Height', 'Time of a day', 'Duration']
df.index.names = ['house']
names = ['Summer', 'Autumn', 'Winter', 'Spring']
arr = np.asarray(names)
df.index = pd.MultiIndex.from_arrays([arr[df.index % len(names)], df.index],
names=['season','cluster'])
return df
houses_array = [2332,89,306,4,2551] with PdfPages('cluster3.pdf') as pdf: print("cluster 3") centroid_3 = scatter_2d (houses_array,3) pdf.savefig() plt.close() centroid_3.to_excel("centroids3.xlsx")
house_array = extract_house2(2)
df = data_format(house_array)
df = df.fillna(0)
df.describe()
house_array = extract_house2(2)
df = data_format(house_array)
df = df.fillna(0)
december = df.iloc[:,334:]
Jan_Fe = df.iloc[:,:59]
Summer_df = pd.concat([december, Jan_Fe], axis=1, sort=False)
Autumn_df = df.iloc[:,59:151]
Winter_df = df.iloc[:,151:243]
Spring_df = df.iloc[:,243:334]
def seasonal_spike_3d(house_num,n):
#print("HOUSE NO: ", house_num)
house_array = extract_house2(house_num)
df = data_format(house_array)
df = df.fillna(0)
december = df.iloc[:,334:]
Jan_Fe = df.iloc[:,:59]
Summer_df = pd.concat([december, Jan_Fe], axis=1, sort=False)
Autumn_df = df.iloc[:,59:151]
Winter_df = df.iloc[:,151:243]
Spring_df = df.iloc[:,243:334]
Summer_shaped_array = reshaping(Summer_df)
summer_pos = show_pos(Summer_shaped_array)
summer_spike_df,sum_spike = Generate_clustering_features(summer_pos,1)
Autumn_shaped_array = reshaping(Autumn_df)
Autumn_pos = show_pos(Autumn_shaped_array)
Autumn_spike_df,sum_spike = Generate_clustering_features(Autumn_pos,1)
Winter_shaped_array = reshaping(Winter_df)
Winter_pos = show_pos(Winter_shaped_array)
Winter_spike_df,sum_spike = Generate_clustering_features(Winter_pos,1)
Spring_shaped_array = reshaping(Spring_df)
Spring_pos = show_pos(Spring_shaped_array)
Spring_spike_df,sum_spike = Generate_clustering_features(Spring_pos,1)
seasons = [summer_spike_df,Autumn_spike_df,Winter_spike_df,Spring_spike_df]
#print("Summer")
centroid_summer = GMM_3d(summer_spike_df,n)
#print("Autumn")
centroid_autumn = GMM_3d(Autumn_spike_df,n)
#print("Winter")
centroid_winter = GMM_3d(Winter_spike_df,n)
#print("Spring")
centroid_spring = GMM_3d(Spring_spike_df,n)
result = pd.concat([centroid_summer, centroid_autumn,centroid_winter,centroid_spring], axis=1, join_axes=[centroid_summer.index])
return result
def GMM_cluster(X,n,df1):
gmm = GMM(n_components=n,covariance_type='tied',random_state=42).fit(X)
cluster_labels = gmm.predict(X)
clmns = ['Height', 'time_of_day','resolution']
#Glue back to originaal data
df1['clusters'] = cluster_labels
#Add the column into our list
clmns.extend(['clusters'])
centroid = df1[clmns].groupby(['clusters']).median()
return centroid
n=0
if n < 4 and n >= 1 :
centroid = [1,2,3]
elif n >= 4:
centroid =[4,4,4]
else:
centroid = [0,0,0]
# GMM
def GMM_3d(Nm_df,n):
df1 = Nm_df.copy()
df1 = df1.fillna(0)
X = df1.iloc[:].values
if len(X) < 4 and len(X) >= 1 :
centroid = GMM_cluster(X,len(X),df1)
elif len(X) >= 4:
centroid = GMM_cluster(X,n,df1)
else:
d = {'Height': [0], 'time_of_day': [0], 'resolution':0}
d_test = pd.DataFrame(data=d)
d_test.index.name = 'clusters'
centroid = d_test
return centroid
# GMM: real
def GMM_3d(Nm_df,n):
df1 = Nm_df.copy()
df1 = df1.fillna(0)
X = df1.iloc[:].values
if len(X) < 4:
gmm = GMM(n_components=len(X),covariance_type='tied',random_state=42).fit(X)
elif len(X) >= 4:
gmm = GMM(n_components=n,covariance_type='tied',random_state=42).fit(X)
else:
cnetroid = [0,0,0]
cluster_labels = gmm.predict(X)
clmns = ['Height', 'time_of_day','resolution']
#Glue back to originaal data
df1['clusters'] = cluster_labels
#Add the column into our list
clmns.extend(['clusters'])
centroid = df1[clmns].groupby(['clusters']).mean()
#print (centroid)
# silhouette_avg = silhouette_score(X, cluster_labels)
# print("For n_clusters =", n,
# "The average silhouette_score is :", silhouette_avg)
# fig, (ax1, ax2) = plt.subplots(1, 2)
# fig.set_size_inches(18, 7)
# ax = Axes3D(fig)
# colors = cm.nipy_spectral(cluster_labels.astype(float) / n)
# ax.scatter(df1['time_of_day'], df1['resolution'], df1['Height'],marker='o', s=20, lw=0, alpha=0.7,
# c=colors, edgecolor='k') # x,z,y
# for pt in centroid:
# ax.scatter(centroid["time_of_day"], centroid["resolution"],centroid["Height"],color='r')
# plt.suptitle(("Silhouette analysis for KMeans clustering on sample data "
# "with n_clusters = %d" % n),
# fontsize=14, fontweight='bold')
#plt.show()
return centroid
# GMM
def GMM_3d(Nm_df,n):
df1 = Nm_df.copy()
df1 = df1.fillna(0)
X = df1.iloc[:].values
gmm = GMM(n_components=n,covariance_type='tied',random_state=42).fit(X)
cluster_labels = gmm.predict(X)
clmns = ['Height', 'time_of_day','resolution']
#Glue back to originaal data
df1['clusters'] = cluster_labels
#Add the column into our list
clmns.extend(['clusters'])
centroid = df1[clmns].groupby(['clusters']).mean()
#print (centroid)
# silhouette_avg = silhouette_score(X, cluster_labels)
# print("For n_clusters =", n,
# "The average silhouette_score is :", silhouette_avg)
# fig, (ax1, ax2) = plt.subplots(1, 2)
# fig.set_size_inches(18, 7)
# ax = Axes3D(fig)
# colors = cm.nipy_spectral(cluster_labels.astype(float) / n)
# ax.scatter(df1['time_of_day'], df1['resolution'], df1['Height'],marker='o', s=20, lw=0, alpha=0.7,
# c=colors, edgecolor='k') # x,z,y
# for pt in centroid:
# ax.scatter(centroid["time_of_day"], centroid["resolution"],centroid["Height"],color='r')
# plt.suptitle(("Silhouette analysis for KMeans clustering on sample data "
# "with n_clusters = %d" % n),
# fontsize=14, fontweight='bold')
#plt.show()
return centroid
# GMM
def GMM_3d(Nm_df,n):
df1 = Nm_df.copy()
df1 = df1.fillna(0)
X = df1.iloc[:].values
gmm = GMM(n_components=n,covariance_type='tied',random_state=42).fit(X)
cluster_labels = gmm.predict(X)
clmns = ['Height', 'time_of_day','resolution']
#Glue back to originaal data
df1['clusters'] = cluster_labels
#Add the column into our list
clmns.extend(['clusters'])
centroid = df1[clmns].groupby(['clusters']).median()
#print (centroid)
# silhouette_avg = silhouette_score(X, cluster_labels)
# print("For n_clusters =", n,
# "The average silhouette_score is :", silhouette_avg)
# fig, (ax1, ax2) = plt.subplots(1, 2)
# fig.set_size_inches(18, 7)
# ax = Axes3D(fig)
# colors = cm.nipy_spectral(cluster_labels.astype(float) / n)
# ax.scatter(df1['time_of_day'], df1['resolution'], df1['Height'],marker='o', s=20, lw=0, alpha=0.7,
# c=colors, edgecolor='k') # x,z,y
# for pt in centroid:
# ax.scatter(centroid["time_of_day"], centroid["resolution"],centroid["Height"],color='r')
# plt.suptitle(("Silhouette analysis for KMeans clustering on sample data "
# "with n_clusters = %d" % n),
# fontsize=14, fontweight='bold')
#plt.show()
return centroid
#Houses_array = [2,4]
Controid_results = {}
for i in Houses_array:
#colormap_house(i)
c = seasonal_spike_3d(i,4)
Controid_results[i] = c
#Controid_results.append(c)
Controid_results = pd.concat(Controid_results, axis=0)
Controid_results_s = Controid_results.copy()
#col = [0,1,2,3]
# Controid_results = pd.concat(Controid_results, axis=0)
df_multiindex = pd.MultiIndex.from_product(
(('Summer', 'Autumn', 'Winter', 'Spring'),
('Height', 'time_of_day', 'resolution')))
# df_multiindex_row = pd.MultiIndex.from_product(
# (h_test,
# col)
# )
Controid_results_s.columns = df_multiindex
#Controid_results.index = df_multiindex_row
Controid_results_s
median_cluster = Controid_results_s.copy()
median_cluster = median_cluster.sort_values([('Summer', 'time_of_day')], ascending=False)
median_cluster
Controid_results_s.to_excel("Controid_results_Median.xlsx")
Heights = Controid_results['Height']
time_of_days = Controid_results['time_of_day']
resolutions = Controid_results['resolution']
time_of_days
def convert_resolution(x):
r = x*x
c = round(r*(17/2))
return c
conver_t = [1,4,7,10]
conver_r = [2,5,8,11]
def conver_time(x):
t = x*(48/2)
return int(t)
test_cen = Controid_results_s.copy()
test_cen = test_cen.fillna(0)
test_cen_ori = test_cen.copy()
for i in conver_t:
converted_t = []
for j in test_cen.iloc[:,i]:
repl = conver_time(j)
converted_t.append(repl)
test_cen.iloc[:,i] = converted_t
for i in conver_r:
converted_r = []
for j in test_cen.iloc[:,i]:
repl = convert_resolution(j)
converted_r.append(repl)
test_cen.iloc[:,i] = converted_r
test_cen.to_excel("Controid_results_Median_converted.xlsx")
Controid_results.head()
test_cen
df1
def median_3d (df,n):
df = df.xs(n, level=1, drop_level=False)
fig =plt.figure(figsize = (20,6))
df1=df.copy()
ax = Axes3D(fig)
ax.scatter(df1['time_of_day'], df1['resolution'], df1['Height'],marker='o', s=60, lw=0, alpha=0.7,
edgecolor='k') # x,z,y
ax.set_ylim([0,14])
ax.set_zlim([0,6])
plt.suptitle(("Scatter plot of cluster %d" % n, "for 100 houses"),
fontsize=14, fontweight='bold')
Controid_results_s["Winter"]
h = np.array(winter_df['time_of_day'].values)
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
x =[1,2,3,4,5,6,7,8,9,10]
y =[5,6,2,3,13,4,1,2,4,8]
z =[2,3,3,3,5,7,9,11,9,10]
ax.scatter(x, y, z, c='r', marker='o')
ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')
plt.show()
winter_df = test_cen["Winter"]
import plotly.plotly as py
import plotly.graph_objs as go
import plotly
%matplotlib inline
%matplotlib notebook
%pylab
time = winter_df['time_of_day']
resolution = winter_df['resolution']
height = winter_df['Height']
trace1 = go.Scatter3d(
x=time,
y=resolution,
z=height,
mode='markers',
marker=dict(
size=5,
line=dict(
color='rgba(217, 217, 217, 0.14)',
width=0.9
),
opacity=0.8
)
)
data = [trace1]
layout = go.Layout(
margin=dict(
l=0,
r=0,
b=0,
t=0
)
)
fig = go.Figure(data=data)
py.iplot(fig, filename='simple-3d-scatter')
import plotly.plotly as py
import plotly.graph_objs as go
import plotly
plotly.tools.set_credentials_file(username='selee19', api_key='1IhNevlJpE6EC8CRtpmp')
time = winter_df['time_of_day']
resolution = winter_df['resolution']
height = winter_df['Height']
trace1 = go.Scatter3d(
x=time,
y=resolution,
z=height,
mode='markers',
marker=dict(
size=5,
line=dict(
color='rgba(217, 217, 217, 0.14)',
width=0.9
),
opacity=0.8
)
)
data = [trace1]
layout = go.Layout(
margin=dict(
l=0,
r=0,
b=0,
t=0
)
)
fig = go.Figure(data=data)
py.iplot(fig, filename='simple-3d-scatter')
winter_df['resolution']
winter_df.head()
Controid_results_s = test_cen.copy()
winter_df = Controid_results_s["Winter"]
print("Winter")
for i in range(4):
median_3d(winter_df,i)
Spring_df = Controid_results_s["Spring"]
print("Spring")
for i in range(4):
median_3d(Spring_df,i)
Summer_df = Controid_results_s["Summer"]
print("Summer")
for i in range(4):
median_3d(Summer_df,i)
Autumn_df = Controid_results_s["Autumn"]
print("Autumn")
for i in range(4):
median_3d(Autumn_df,i)
clust_0 = Controid_results_s.xs(0, level=1, drop_level=False)
clust_3 = test_cen.reset_index(level=0, drop=True).loc[3]
clust_0
clust_0_2.sort_values(by=['Height'])
clust_0_2
fig =plt.figure(figsize = (20,6))
df1=clust_0_2.copy()
ax = Axes3D(fig)
ax.scatter(df1['time_of_day'], df1['resolution'], df1['Height'],marker='o', s=20, lw=0, alpha=0.7,
edgecolor='k') # x,z,y
ax.set_ylim([0,14])
ax.set_zlim([0,6])
plt.suptitle(("Scatter plot of cluster 0 for 100 houses"),
fontsize=14, fontweight='bold')
fig =plt.figure(figsize = (20,6))
df1=clust_1_2.copy()
ax = Axes3D(fig)
ax.scatter(df1['time_of_day'], df1['resolution'], df1['Height'],marker='o', s=20, lw=0, alpha=0.7,
edgecolor='k') # x,z,y
ax.set_ylim([0,14])
ax.set_zlim([0,6])
plt.suptitle(("Scatter plot of cluster 1 for 100 houses"),
fontsize=14, fontweight='bold')
fig =plt.figure(figsize = (20,6))
df1=clust_2_2.copy()
ax = Axes3D(fig)
ax.scatter(df1['resolution'], df1['time_of_day'], df1['Height'],marker='o', s=20, lw=0, alpha=0.7,
edgecolor='k') # x,z,y
ax.set_ylim([0,48])
ax.set_zlim([0,6])
plt.suptitle(("Scatter plot of cluster 0 for 100 houses"),
fontsize=14, fontweight='bold')
fig =plt.figure(figsize = (20,6))
df1=clust_2_2.copy()
ax = Axes3D(fig)
ax.scatter(df1['time_of_day'], df1['resolution'], df1['Height'],marker='o', s=20, lw=0, alpha=0.7,
edgecolor='k') # x,z,y
ax.set_ylim([0,14])
ax.set_zlim([0,6])
plt.suptitle(("Scatter plot of cluster 2 for 100 houses"),
fontsize=14, fontweight='bold')
fig =plt.figure(figsize = (20,6))
df1=clust_3.copy()
ax = Axes3D(fig)
ax.scatter(df1['resolution'], df1['time_of_day'], df1['Height'],marker='o', s=20, lw=0, alpha=0.7,
edgecolor='k') # x,z,y
ax.set_ylim([0,48])
ax.set_zlim([0,7])
plt.suptitle(("Scatter plot of cluster 3 for 100 houses"
"with n_clusters = %d" % n),
fontsize=14, fontweight='bold')
fig =plt.figure(figsize = (20,6))
df1=clust_3.copy()
ax = Axes3D(fig)
ax.scatter(df1['time_of_day'], df1['resolution'], df1['Height'],marker='o', s=20, lw=0, alpha=0.7,
edgecolor='k') # x,z,y
ax.set_ylim([0,14])
ax.set_zlim([0,6])
plt.suptitle(("Scatter plot of cluster 3 for 100 houses"),
fontsize=14, fontweight='bold')
Controid_results_s
Winter = Controid_results_s['Winter']
Spring = Controid_results_s['Spring']
Spring.at[2,'Height']
Spring
def mean_centroid(df):
height = np.mean(df["Height"])
time_of_day = np.mean(df["time_of_day"])
resolution = np.mean(df["resolution"])
return height,time_of_day,resolution
mean_centroid(Summer)
mean_centroid(Winter)
mean_centroid(Autumn)
mean_centroid(Spring)
Autumn.iloc[:,0]
time_of_days.iloc[:,0]
for i in x:
print(i)
np.mean(Controid_results['Height'])
np.mean(Controid_results['time_of_day'])
np.mean(Controid_results['resolution'])
appended_data = [] for infile in glob.glob("*.xlsx"): data = pandas.read_excel(infile)
# store DataFrame in list
appended_data.append(data)
appended_data = pd.concat(appended_data, axis=1)
appended_data.to_excel('appended.xlsx')
houses_array = [73]
Controid_results = []
for i in houses_array:
#colormap_house(i)
c = seasonal_spike_3d(i,4)
Controid_results.append(c)
Controid_results = pd.concat(Controid_results, axis=0)
def seasonal_spike_2d(house_num,n):
print("HOUSE NO: ", house_num)
house_array = extract_house2(house_num)
df = data_format(house_array)
december = df.iloc[:,334:]
Jan_Fe = df.iloc[:,:59]
Summer_df = pd.concat([december, Jan_Fe], axis=1, sort=False)
Autumn_df = df.iloc[:,59:151]
Winter_df = df.iloc[:,151:243]
Spring_df = df.iloc[:,243:334]
Summer_shaped_array = reshaping(Summer_df)
summer_pos = show_pos(Summer_shaped_array)
summer_spike_df,sum_spike = Generate_clustering_features(summer_pos,1)
Autumn_shaped_array = reshaping(Autumn_df)
Autumn_pos = show_pos(Autumn_shaped_array)
Autumn_spike_df,sum_spike = Generate_clustering_features(Autumn_pos,1)
Winter_shaped_array = reshaping(Winter_df)
Winter_pos = show_pos(Winter_shaped_array)
Winter_spike_df,sum_spike = Generate_clustering_features(Winter_pos,1)
Spring_shaped_array = reshaping(Spring_df)
Spring_pos = show_pos(Spring_shaped_array)
Spring_spike_df,sum_spike = Generate_clustering_features(Spring_pos,1)
seasons = [summer_spike_df,Autumn_spike_df,Winter_spike_df,Spring_spike_df]
print("Summer")
GMM_2d_scatter(summer_spike_df,n)
print("Autumn")
GMM_2d_scatter(Autumn_spike_df,n)
print("Winter")
GMM_2d_scatter(Winter_spike_df,n)
print("Spring")
GMM_2d_scatter(Spring_spike_df,n)
def GMM_2d_scatter(Nm_df_G,n):
X = Nm_df_G.copy()
gmm = GMM(n_components=n,covariance_type='tied',random_state=42).fit(X)
labels = gmm.predict(X)
clmns = ['Height', 'time_of_day','resolution']
#Glue back to originaal data
X['clusters'] = labels
#Add the column into our list
clmns.extend(['clusters'])
#print (df[clmns].groupby(['clusters']).mean())
sns.lmplot('time_of_day', 'Height',
data=X,
fit_reg=False,
hue="clusters",
scatter_kws={"marker": "D",
"s": 40,
"cmap":'viridis'})
plt.title('time_of_day vs Height')
plt.xlabel('time_of_day')
plt.ylabel('Height')
#tied
houses_array = [2332,89,306,4,2551]
for i in houses_array:
seasonal_spike_2d(i,8)
#tied
houses_array = [2332,89,306,4,2551]
for i in houses_array:
seasonal_spike_2d(i,8)
houses_array = [2332,89,306,4,2551]
for i in houses_array:
seasonal_spike_2d(i,8)
houses_array = [2732,916,4,2705,1028,1577]
for i in houses_array:
seasonal_spike_2d(i,8)
ipython nbconvert --to html Season_clustering_detect100houses-Copy1.ipynb